For the reader: You can click the black “code” box on the right hand side to either see or hide the R code behind analysis results and graphs.

The results are arranged to separate tabs by site/area, so that it’s easy to look at just the results of the site of interest, instead of scrolling accidentally down to a different site and getting confused.

The results for each site consist of four parts: pXRF, particle size distribution, loss on ignition, and colorimetry.

Sites

Ashdod-Yam

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AY.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[-c(1:3)] <- pxrf[-c(1:3)] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$'Group.1'
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[-c(1:2)] <- scale(pxrf_scaled[-c(1:2)])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements pxrf_errors: Means of available values and error values for a quick glimpse

colnames(is.na(pxrf_all[-c(1:2)]))
##  [1] "MgO"   "Al2O3" "SiO2"  "P2O5"  "S"     "Cl"    "K2O"   "CaO"   "Ti"   
## [10] "V"     "Cr"    "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"    "Mo"    "Rh"    "Ag"    "Sn"    "Ba"   
## [28] "La"    "Ta"    "Pb"    "Bi"
pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- colMeans(pxrf_errors[-c(1:2)])
pxrf_errors <- (pxrf_errors * 1000)
pxrf_errors <- round(pxrf_errors, digits = 2)
pxrf_errors
##       MgO   MgO;Err     Al2O3 Al2O3;Err      SiO2  SiO2;Err      P2O5  P2O5;Err 
##   2243.75    938.32   6499.80    191.82  38740.11    236.44     78.00     19.35 
##         S     S;Err        Cl    Cl;Err       K2O   K2O;Err       CaO   CaO;Err 
##     26.88      9.54     27.90      6.61    644.46      7.54   4518.91     16.14 
##        Ti    Ti;Err         V     V;Err        Cr    Cr;Err        Mn    Mn;Err 
##    360.27      3.83      2.85      1.29      0.59      1.02     34.35      2.35 
##        Fe    Fe;Err        Co    Co;Err        Ni    Ni;Err        Cu    Cu;Err 
##   1725.97      9.26      0.00      1.29      1.67      0.51      2.33      0.31 
##        Zn    Zn;Err        As    As;Err        Se    Se;Err        Rb    Rb;Err 
##      2.06      0.30      0.32      0.20      0.00      0.22      1.97      0.27 
##        Sr    Sr;Err         Y     Y;Err        Zr    Zr;Err        Nb    Nb;Err 
##      9.11      0.35      1.22      0.37     16.21      0.34      0.49      0.33 
##        Mo    Mo;Err        Rh    Rh;Err        Pd    Pd;Err        Ag    Ag;Err 
##      0.08      0.42      0.08      0.57      0.00      1.48      0.17      0.79 
##        Cd    Cd;Err        Sn    Sn;Err        Sb    Sb;Err        Ba    Ba;Err 
##      0.00      1.52      0.37      9.66      0.00      3.23      5.92     11.77 
##        La    La;Err        Ce    Ce;Err        Hf    Hf;Err        Ta    Ta;Err 
##      0.77     16.96      0.00      0.40      0.00      0.27      0.03      0.40 
##         W     W;Err        Pt    Pt;Err        Au    Au;Err        Hg    Hg;Err 
##      0.00      0.57      0.00      0.50      0.00      0.83      0.00      0.28 
##        Tl    Tl;Err        Pb    Pb;Err        Bi    Bi;Err        Th    Th;Err 
##      0.00      0.24      0.06      0.84      0.02      0.95      0.00      1.25 
##         U     U;Err 
##      0.00      3.10

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Ba) %>% 
        select(-MgO) %>% 
        select(-Cu) %>% 
        select(-Cr) %>% 
        select(-Mo) %>% 
        select(-Rh) %>% 
        select(-Ag) %>% 
        select(-Sn) %>% 
        select(-La) %>% 
        select(-Ta) %>% 
        select(-Pb) %>% 
        select(-Bi)


# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.2132 1.0303 0.89076 0.73069 0.56172 0.49807 0.30664
## Proportion of Variance 0.6123 0.1327 0.09918 0.06674 0.03944 0.03101 0.01175
## Cumulative Proportion  0.6123 0.7450 0.84417 0.91090 0.95035 0.98136 0.99311
##                            PC8
## Standard deviation     0.23478
## Proportion of Variance 0.00689
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 47 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_final[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas (Copper included)")
    rect.dendrogram(dend, 7, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

# HCA dendrogram, samples color coded by type:
dend3 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    type <- as.factor(pxrf_final[, 2])
    n_type <- length(unique(type))
    cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
    col_type <- cols_t[type]
    labels_colors(dend3) <- col_type[order.dendrogram(dend3)]

    plot(dend3, main="HCA with sample types (NO COPPER)")
    rect.dendrogram(dend3, 6, border = "Black", lty = 5)
    legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AY.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=2)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line(size= 1) +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AY.xlsx", sep=";")
tga <- read.xlsx("data/tga_AY.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Removing duplicates for clarity
loi <- subset(loi[1:47, ])
tga <- subset(tga[-c(18,20,47,49), ])

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##                  context        type     c550      c950
## AY-1    Area D acropolis    mudbrick 2.088275 4.8891890
## AY-2    Area D acropolis    mudbrick 2.210488 1.9841768
## AY-3    Area D acropolis    mudbrick 1.360910 2.2488113
## AY-4    Area D acropolis    mudbrick 1.430588 1.8964316
## AY-5    Area D acropolis    mudbrick 1.338035 3.3256238
## AY-6    Area D acropolis mud plaster 1.480312 1.2721627
## AY-7    Area D acropolis    mudbrick 1.430335 3.1305245
## AY-8    Area D acropolis mud plaster 1.649521 1.9801623
## AY-9    Area D acropolis    mudbrick 2.368712 3.9045130
## AY-10   Area D acropolis    mudbrick 1.049666 3.1941032
## AY-11   Area D acropolis    mudbrick 1.082732 1.9786697
## AY-12   Area D acropolis         PEM 1.183502 0.7730823
## AY-13   Area D acropolis    mudbrick 1.389532 4.5525165
## AY-14   Area D acropolis    mudbrick 1.787980 2.7523295
## AY-15   Area D acropolis    mudbrick 2.233289 2.7459408
## AY-16   Area D acropolis    mudbrick 1.026364 0.9583722
## AY-17   Area D acropolis    mudbrick 1.569395 3.5356312
## AY-18   Area D acropolis    mudbrick 1.770410 2.6403398
## AY-19   Area D acropolis    mudbrick 1.454623 3.3395017
## AY-20   Area D acropolis mud plaster 1.593025 1.5216091
## AY-21   Area D acropolis    mudbrick 1.729351 1.7560317
## AY-22        Area D wall    mudbrick 1.280392 1.5344272
## AY-23        Area D wall    mudbrick 1.782016 1.9743301
## AY-24        Area D wall    mudbrick 1.697649 1.1877784
## AY-25        Area D wall    mudbrick 1.528936 2.7365019
## AY-26        Area D wall    mudbrick 1.802970 3.6061442
## AY-27        Area D wall mud plaster 1.885674 1.5169278
## AY-28        Area D wall    mudbrick 1.399589 0.7854046
## AY-29        Area D wall mud plaster 1.855100 2.0634749
## AY-30        Area D wall    mudbrick 2.552046 3.4633045
## AY-31        Area D wall    mudbrick 1.455348 3.4109094
## AY-32        Area D wall    mudbrick 1.244529 1.4577882
## AY-33        Area D wall    mudbrick 1.300813 0.8936149
## AY-34        Area D wall    mudbrick 1.581909 1.7044229
## AY-35        Area D wall    mudbrick 1.931867 2.6061513
## AY-36        Area D wall    mudbrick 2.422288 2.5205456
## AY-37        Area D wall    mudbrick 1.139581 1.0607829
## AY-38        Area D wall    mudbrick 2.130927 1.4162417
## AY-39        Area D wall  mud mortar 1.904037 2.5000000
## AY-40        Area D wall mud plaster 2.482920 2.5301253
## AY-41        Area D wall    mudbrick 2.565292 3.7553575
## AY-42 Hellenistic area A    mudbrick 1.593653 1.9123424
## AY-50    Builder's floor         PEM 1.012178 1.5457741
## AY-51    Builder's floor         PEM 1.212985 1.2668590
## AY-52    Builder's floor         PEM 1.763143 2.3906130
## AY-53    Builder's floor         PEM 1.402188 8.0417546
## AY-54    Builder's floor         PEM 1.843655 2.9683284

TGA dataset:

tga[c(8:9,10:11)]
##                  context        type     c550      c950
## AY-1    Area D acropolis    mudbrick 2.398013 2.4471417
## AY-2    Area D acropolis    mudbrick 1.736874 7.9947176
## AY-4    Area D acropolis    mudbrick 1.929141 1.4280310
## AY-3    Area D acropolis    mudbrick 1.233825 3.4734918
## AY-5    Area D acropolis    mudbrick 1.644479 3.5987261
## AY-7    Area D acropolis    mudbrick 1.505973 3.4082173
## AY-8    Area D acropolis mud plaster 1.558115 3.6606406
## AY-9    Area D acropolis    mudbrick 1.927070 3.2099602
## AY-10   Area D acropolis    mudbrick 1.176203 3.8700760
## AY-11   Area D acropolis    mudbrick 1.248990 1.3615058
## AY-12   Area D acropolis         PEM 1.274945 0.7860752
## AY-13   Area D acropolis    mudbrick 1.281796 3.7069429
## AY-14   Area D acropolis    mudbrick 1.971081 2.2591414
## AY-15   Area D acropolis    mudbrick 2.647777 2.4588519
## AY-16   Area D acropolis    mudbrick 1.329581 3.4621816
## AY-17   Area D acropolis    mudbrick 1.631206 4.5833762
## AY-18   Area D acropolis    mudbrick 1.698302 3.6382114
## AY-19   Area D acropolis    mudbrick 1.874606 3.2830310
## AY-20   Area D acropolis mud plaster 1.877608 2.0755290
## AY-21   Area D acropolis    mudbrick 1.413217 1.8372703
## AY-22        Area D wall    mudbrick 2.099634 2.0131512
## AY-23        Area D wall    mudbrick 1.749414 1.3528300
## AY-24        Area D wall    mudbrick 1.711251 1.4208525
## AY-25        Area D wall    mudbrick 1.938736 1.9671807
## AY-26        Area D wall    mudbrick 1.663242 3.9001094
## AY-27        Area D wall mud plaster 1.696577 1.6560255
## AY-28        Area D wall    mudbrick 1.259601 0.7233976
## AY-29        Area D wall mud plaster 1.724623 3.9961850
## AY-30        Area D wall    mudbrick 2.586865 3.2631063
## AY-31        Area D wall    mudbrick 1.343570 3.0447471
## AY-32        Area D wall    mudbrick 1.336629 1.4660852
## AY-33        Area D wall    mudbrick 1.268116 0.8460754
## AY-34        Area D wall    mudbrick 1.692888 1.4506317
## AY-35        Area D wall    mudbrick 2.002379 2.3973296
## AY-6    Area D acropolis mud plaster 1.659626 1.2887389
## AY-36        Area D wall    mudbrick 2.500488 2.3943098
## AY-37        Area D wall    mudbrick 1.077605 0.7994378
## AY-38        Area D wall    mudbrick 2.164785 1.6976633
## AY-39        Area D wall  mud mortar 1.915888 2.8108623
## AY-40        Area D wall mud plaster 2.618731 2.4977211
## AY-42 Hellenistic area A    mudbrick 1.655597 1.5855926
## AY-50    Builder's floor         PEM 1.347992 1.5505378
## AY-51    Builder's floor         PEM 1.282281 1.6713598
## AY-52    Builder's floor         PEM 1.492264 2.2490706
## AY-53    Builder's floor         PEM 1.276679 5.4590326
## AY-54    Builder's floor         PEM 1.651196 3.0674290

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI",
        x ="Temperature", y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AY.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :37.56   Min.   : 7.97   Min.   :16.81  
##  1st Qu.:42.13   1st Qu.: 9.72   1st Qu.:19.76  
##  Median :43.64   Median :10.38   Median :21.18  
##  Mean   :43.92   Mean   :10.49   Mean   :21.22  
##  3rd Qu.:45.91   3rd Qu.:11.29   3rd Qu.:22.34  
##  Max.   :48.24   Max.   :13.53   Max.   :25.03
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Ashdod-Yam: Byzantine

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AYB.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[-c(1:3)] <- pxrf[-c(1:3)] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$'Group.1'
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[-c(1:2)] <- scale(pxrf_scaled[-c(1:2)])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements pxrf_errors: Means of available values and error values for a quick glimpse

colnames(is.na(pxrf_all[-c(1:2)]))
##  [1] "MgO"   "Al2O3" "SiO2"  "P2O5"  "S"     "Cl"    "K2O"   "CaO"   "Ti"   
## [10] "V"     "Cr"    "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"    "Mo"    "Ag"    "Sn"    "Ba"    "Ta"   
## [28] "Pb"
pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- colMeans(pxrf_errors[-c(1:2)])
pxrf_errors <- (pxrf_errors * 1000)
pxrf_errors <- round(pxrf_errors, digits = 2)
pxrf_errors
##       MgO   MgO;Err     Al2O3 Al2O3;Err      SiO2  SiO2;Err      P2O5  P2O5;Err 
##   2167.59    963.03   4717.98    191.70  28887.64    209.03    185.36     25.29 
##         S     S;Err        Cl    Cl;Err       K2O   K2O;Err       CaO   CaO;Err 
##    120.90     10.37     39.33      6.10    430.51      7.49  13047.17     27.20 
##        Ti    Ti;Err         V     V;Err        Cr    Cr;Err        Mn    Mn;Err 
##    333.17      4.22      1.29      1.55      0.48      1.08     29.77      2.37 
##        Fe    Fe;Err        Co    Co;Err        Ni    Ni;Err        Cu    Cu;Err 
##   1854.00     10.32      0.00      1.65      1.73      0.57      3.48      0.41 
##        Zn    Zn;Err        As    As;Err        Se    Se;Err        Rb    Rb;Err 
##      1.92      0.31      0.81      0.23      0.00      0.24      1.99      0.31 
##        Sr    Sr;Err         Y     Y;Err        Zr    Zr;Err        Nb    Nb;Err 
##     23.38      0.48      1.10      0.44     18.46      0.39      0.38      0.38 
##        Mo    Mo;Err        Rh    Rh;Err        Pd    Pd;Err        Ag    Ag;Err 
##      0.08      0.44      0.00      0.53      0.00      1.64      0.16      0.86 
##        Cd    Cd;Err        Sn    Sn;Err        Sb    Sb;Err        Ba    Ba;Err 
##      0.00      1.54      1.05      9.86      0.00      3.42     10.27     12.01 
##        La    La;Err        Ce    Ce;Err        Hf    Hf;Err        Ta    Ta;Err 
##      0.00     17.43      0.00      2.10      0.00      0.30      0.03      0.45 
##         W     W;Err        Pt    Pt;Err        Au    Au;Err        Hg    Hg;Err 
##      0.00      0.64      0.00      0.50      0.00      0.93      0.00      0.33 
##        Tl    Tl;Err        Pb    Pb;Err        Bi    Bi;Err        Th    Th;Err 
##      0.00      0.28      6.05      1.03      0.00      1.07      0.00      1.37 
##         U     U;Err 
##      0.00      3.40

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Pb) %>% 
        select(-MgO) %>% 
        select(-Cu) %>% 
        select(-Cr) %>% 
        select(-Nb) %>% 
        select(-Mo) %>% 
        select(-Ag) %>% 
        select(-Sn) %>% 
        select(-Ba) %>% 
        select(-Ta)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6       PC7
## Standard deviation     2.2053 1.3764 0.85824 0.56195 0.40409 0.16327 9.454e-17
## Proportion of Variance 0.6079 0.2368 0.09207 0.03947 0.02041 0.00333 0.000e+00
## Cumulative Proportion  0.6079 0.8447 0.93678 0.97626 0.99667 1.00000 1.000e+00
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Ashdod-Yam Byzantine elements")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Ashdod-Yam Byzantine grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 7 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by type:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    type <- as.factor(pxrf_final[, 2])
    n_type <- length(unique(type))
    cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
    col_type <- cols_t[type]
    labels_colors(dend) <- col_type[order.dendrogram(dend)]

    plot(dend, main="HCA with sample types (Copper included)")
    rect.dendrogram(dend, 5, border = "Black", lty = 5)
    legend("topright", legend = levels(type), fill = cols_t)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AYB.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2) 

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AYB.xlsx", sep=";")
tga <- read.xlsx("data/tga_AYB.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##         context        type      c550      c950
## AY-43 Byzantine     ceiling 2.8231692 13.606389
## AY-44 Byzantine       floor 3.1549254  4.143032
## AY-45 Byzantine PEM (burnt) 9.7500231  6.336877
## AY-46 Byzantine         PEM 3.0153992  1.087679
## AY-47 Byzantine     ceiling 2.2356517  6.463715
## AY-48 Byzantine     ceiling 1.8207856 15.544905
## AY-49 Byzantine         PEM 0.4835757 32.404290

TGA dataset:

tga[c(8:9,10:11)]
##         context        type      c550      c950
## AY-43 Byzantine     ceiling 3.2957418 12.043832
## AY-44 Byzantine       floor 4.3270582  8.653940
## AY-45 Byzantine PEM (burnt) 6.2941554  9.781651
## AY-46 Byzantine         PEM 3.8585514  6.813924
## AY-47 Byzantine     ceiling 2.0075863  7.902190
## AY-48 Byzantine     ceiling 1.8409714 11.731844
## AY-49 Byzantine         PEM 0.3230349 29.724115

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Traditional LOI",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = variable)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI",
        x ="Temperature", y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by type", 
           color = "Type",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(type))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by type", 
           color = "Type",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AYB.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :34.56   Min.   : 2.480   Min.   : 6.98  
##  1st Qu.:38.93   1st Qu.: 5.215   1st Qu.:12.73  
##  Median :45.30   Median : 5.540   Median :13.28  
##  Mean   :46.69   Mean   : 6.211   Mean   :14.56  
##  3rd Qu.:50.95   3rd Qu.: 6.675   3rd Qu.:16.21  
##  Max.   :67.20   Max.   :11.680   Max.   :23.75
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Israel

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_israel.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[-c(1:3)] <- pxrf[-c(1:3)] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$'Group.1'
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[-c(1:2)] <- scale(pxrf_scaled[-c(1:2)])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements pxrf_errors: Means of available values and error values for a quick glimpse

colnames(is.na(pxrf_all[-c(1:2)]))
##  [1] "MgO"   "Al2O3" "SiO2"  "P2O5"  "S"     "Cl"    "K2O"   "CaO"   "Ti"   
## [10] "V"     "Cr"    "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Se"   
## [19] "Rb"    "Sr"    "Y"     "Zr"    "Nb"    "Mo"    "Ag"    "Sn"    "Ba"   
## [28] "La"
pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- colMeans(pxrf_errors[-c(1:2)])
pxrf_errors <- (pxrf_errors * 1000)
pxrf_errors <- round(pxrf_errors, digits = 2)
pxrf_errors
##       MgO   MgO;Err     Al2O3 Al2O3;Err      SiO2  SiO2;Err      P2O5  P2O5;Err 
##   4149.63    924.82   3704.58    194.18  17060.92    170.01    544.08     31.41 
##         S     S;Err        Cl    Cl;Err       K2O   K2O;Err       CaO   CaO;Err 
##    418.44     10.56    102.23      5.58    454.97      8.26  23503.67     35.60 
##        Ti    Ti;Err         V     V;Err        Cr    Cr;Err        Mn    Mn;Err 
##    243.83      4.17      2.08      1.71      0.04      0.98     23.96      2.14 
##        Fe    Fe;Err        Co    Co;Err        Ni    Ni;Err        Cu    Cu;Err 
##   1107.64      8.36      0.00      1.50      1.22      0.73      4.42      0.61 
##        Zn    Zn;Err        As    As;Err        Se    Se;Err        Rb    Rb;Err 
##      3.51      0.41      0.08      0.19      0.01      0.32      1.62      0.37 
##        Sr    Sr;Err         Y     Y;Err        Zr    Zr;Err        Nb    Nb;Err 
##     60.41      0.77      0.65      0.56     17.83      0.49      0.35      0.56 
##        Mo    Mo;Err        Rh    Rh;Err        Pd    Pd;Err        Ag    Ag;Err 
##      0.05      0.58      0.00      0.44      0.00      1.35      0.21      1.08 
##        Cd    Cd;Err        Sn    Sn;Err        Sb    Sb;Err        Ba    Ba;Err 
##      0.00      1.95      0.06      9.17      0.00      4.28     14.05     13.58 
##        La    La;Err        Ce    Ce;Err        Hf    Hf;Err        Ta    Ta;Err 
##      0.36     22.48      0.00      6.03      0.00      0.38      0.00      0.51 
##         W     W;Err        Pt    Pt;Err        Au    Au;Err        Hg    Hg;Err 
##      0.00      0.81      0.00      0.47      0.00      1.19      0.00      0.34 
##        Tl    Tl;Err        Pb    Pb;Err        Bi    Bi;Err        Th    Th;Err 
##      0.00      0.29      0.00      1.30      0.00      1.34      0.00      1.73 
##         U     U;Err 
##      0.00      4.24

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-Nb) %>% 
        select(-MgO) %>% 
        select(-Cu) %>% 
        select(-Cr) %>% 
        select(-As) %>% 
        select(-Se) %>% 
        select(-Mo) %>% 
        select(-Ag) %>% 
        select(-Sn) %>% 
        select(-Ba) %>% 
        select(-La)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no floor or lime plaster)

pxrf_MB <- pxrf_final[-c(1:7), ]
rownames(pxrf_MB)
##  [1] "AH-4"  "AH-5"  "RLZ-1" "TI-1"  "TI-10" "TI-2"  "TI-3"  "TI-4"  "TI-5" 
## [10] "TI-6"  "TI-7"  "TI-8"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 7), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                          PC1    PC2    PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.214 1.0368 0.9836 0.69250 0.60125 0.33515 0.28885
## Proportion of Variance 0.613 0.1344 0.1209 0.05994 0.04519 0.01404 0.01043
## Cumulative Proportion  0.613 0.7474 0.8683 0.92824 0.97343 0.98747 0.99790
##                           PC8
## Standard deviation     0.1298
## Proportion of Variance 0.0021
## Cumulative Proportion  1.0000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by area")

autoplot(pca_1, data=pxrf_final, colour='Type', shape = FALSE, label = TRUE,  main = "PCA Israel grouped by sample type")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 19 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
# Only Tell Iztabba

pxrf_Iztabba <- pxrf_final[-c(1:10), ]
rownames(pxrf_Iztabba)
## [1] "TI-1"  "TI-10" "TI-2"  "TI-3"  "TI-4"  "TI-5"  "TI-6"  "TI-7"  "TI-8"
# PCA analysis
pca_2 <- prcomp(pxrf_Iztabba[3:9])
summary(pca_2)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.2369 1.1893 0.41691 0.31386 0.27805 0.08642 0.04209
## Proportion of Variance 0.7383 0.2087 0.02565 0.01453 0.01141 0.00110 0.00026
## Cumulative Proportion  0.7383 0.9470 0.97269 0.98723 0.99864 0.99974 1.00000
# PCA plots
biplot(pca_2, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Tell Iztabba elements")

autoplot(pca_2, data=pxrf_Iztabba, colour='Area', shape = FALSE, label.size = 4, label = TRUE,  main = "PCA Tell Iztabba")

PCA(pxrf_Iztabba[3:9])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 9 individuals, described by 7 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
# PC autoplot with K-means clusters for Iztabba
set.seed(1)
autoplot(kmeans(pxrf_Iztabba[3:9], 4), data = pxrf_Iztabba, label = TRUE, label.size = 4.5)

PCA with only mudbrick samples:

# Elements
colnames(pxrf_MB[3:10])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"
# PCA analysis
pca_3 <- prcomp(pxrf_MB[3:10])
summary(pca_3)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.5233 1.1089 0.77291 0.54054 0.29671 0.24740 0.13783
## Proportion of Variance 0.7355 0.1421 0.06901 0.03375 0.01017 0.00707 0.00219
## Cumulative Proportion  0.7355 0.8775 0.94651 0.98026 0.99043 0.99750 0.99969
##                            PC8
## Standard deviation     0.05140
## Proportion of Variance 0.00031
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_3, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Israel MB elements")

autoplot(pca_3, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Israel MB grouped by area")

PCA(pxrf_MB[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 12 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_MB[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas")
    rect.dendrogram(dend, 4, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_israel.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2)

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Tell Iztabba
grain_long2 <- grain[-c(1:3),] 
grain_long2 <- melt(grain_long2[c(1,14:113)], id = "Sample") 

ggplot(grain_long2, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line(size= 1.2) +
    ggtitle("Grain size distribution curves, Tell Iztabba")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_israel.xlsx", sep=";")
tga <- read.xlsx("data/tga_israel.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##              context                  type     c550      c950
## AH-1        Ad Halom   lime plaster (fine) 3.636174 29.250009
## AH-2        Ad Halom                 floor 1.946490  9.981413
## AH-3        Ad Halom lime plaster (coarse) 4.192486 19.321298
## AH-4        Ad Halom              mudbrick 2.542779  4.414064
## AH-5        Ad Halom              mudbrick 1.471627  7.026742
## AH-6        Ad Halom         floor plaster 1.774556 40.504665
## AH-7        Ad Halom lime plaster (coarse) 4.941322 18.079922
## AH-8        Ad Halom                 floor 2.262738 15.868588
## AH-9        Ad Halom   lime plaster (fine) 1.674261 33.459065
## TI-1    Tell Iztabba              mudbrick 2.056555 42.038495
## TI-2    Tell Iztabba              mudbrick 1.847381 42.608634
## TI-3    Tell Iztabba              mudbrick 3.595561 29.149144
## TI-4    Tell Iztabba              mudbrick 4.892311 29.255047
## TI-5    Tell Iztabba              mudbrick 3.460312 27.795385
## TI-6    Tell Iztabba              mudbrick 3.169395 33.345746
## TI-7    Tell Iztabba              mudbrick 2.381649 40.992167
## TI-8    Tell Iztabba              mudbrick 3.088721 26.134740
## TI-10   Tell Iztabba              mudbrick 2.061447 41.299332
## RLZ-1 Rishon Le'Zion              mudbrick 2.364044  3.727290

TGA dataset:

tga[c(8:9,10:11)]
##              context     type     c550      c950
## AH-4        Ad Halom mudbrick 2.789774  4.486895
## AH-5        Ad Halom mudbrick 1.106870  4.457738
## RLZ-1 Rishon Le'Zion mudbrick 2.604865  4.169125
## TI-1    Tell Iztabba mudbrick 1.893905 42.031107
## TI-2    Tell Iztabba mudbrick 1.707268 42.691722
## TI-3    Tell Iztabba mudbrick 3.102582 30.945771
## TI-4    Tell Iztabba mudbrick 4.991016 30.111368
## TI-5    Tell Iztabba mudbrick 3.279413 28.194114
## TI-6    Tell Iztabba mudbrick 2.922239 34.000000
## TI-7    Tell Iztabba mudbrick 2.051722 41.455573
## TI-8    Tell Iztabba mudbrick 2.931780 29.661181
## TI-10   Tell Iztabba mudbrick 2.014504 41.949927

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (Ad-Halom non-MB samples are included!)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (Only MB)",
        x ="Temperature", y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_israel.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a                b        
##  Min.   :43.75   Min.   : 3.160   Min.   :13.85  
##  1st Qu.:56.08   1st Qu.: 3.770   1st Qu.:14.71  
##  Median :64.30   Median : 4.350   Median :15.85  
##  Mean   :63.19   Mean   : 6.649   Mean   :17.27  
##  3rd Qu.:71.51   3rd Qu.: 6.280   3rd Qu.:17.70  
##  Max.   :75.34   Max.   :19.530   Max.   :25.84
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

Palaepaphos

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

MB_table <- matrix(c("LA54/4",  "30 cm x 18 cm", "Top row - weathered",
                  "LA54/4", "25 cm x 18 cm",    "Mid 4th row from the top",
                  "LA54/4", "35 cm x 18 cm (est.)", "B. 8th row from the top",
                  "LA54/4", "45 cm x 18 cm",    "9th row from the top",
                  "LA54/4", "46 cm x 16 cm",    "6th row from the top",
                  "LA59/2", "45 cm x 15 cm (est.)", "Top row - weathered",
                  "LA59/2", "45 cm x 15 cm (est.)", "3rd row from the top",
                  "LA59/2", "33 cm x 15 cm (est.)", "5th row below stones",
                  "LA59/2", "44 cm x 13 cm",    "9th row of the top",
                  "LA59/2", "40 cm x 20 cm",    "B. below disturbance under the marl",
                  "LA54/7", "14 cm x 15 cm (est.)", "(no additional info)"), 
                  ncol=3, byrow=TRUE)
colnames(MB_table) <- c("Context", "Size", "Row")
rownames(MB_table) <- c("PP-9", "PP-10", "PP-11", "PP-12", "PP-13", "PP-14", "PP-15", "PP-16", "PP-17", "PP-18", "PP-19")
MB_table <- as.table(MB_table)

library(knitr)
kable(MB_table)
Context Size Row
PP-9 LA54/4 30 cm x 18 cm Top row - weathered
PP-10 LA54/4 25 cm x 18 cm Mid 4th row from the top
PP-11 LA54/4 35 cm x 18 cm (est.) B. 8th row from the top
PP-12 LA54/4 45 cm x 18 cm 9th row from the top
PP-13 LA54/4 46 cm x 16 cm 6th row from the top
PP-14 LA59/2 45 cm x 15 cm (est.) Top row - weathered
PP-15 LA59/2 45 cm x 15 cm (est.) 3rd row from the top
PP-16 LA59/2 33 cm x 15 cm (est.) 5th row below stones
PP-17 LA59/2 44 cm x 13 cm 9th row of the top
PP-18 LA59/2 40 cm x 20 cm B. below disturbance under the marl
PP-19 LA54/7 14 cm x 15 cm (est.) (no additional info)

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # Pretty PCA plots
library(FactoMineR); # Fast automatic PCA graphs
library(factoextra) # Eigenvalues for PCA
library(dendextend); # Dendrograms
library(plot3D); # 3D plot for PCA results

# Read the raw data
pxrf <- read.xlsx("data/pxrf_PP.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[-c(1:3)] <- pxrf[-c(1:3)] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$'Group.1'
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[-c(1:2)] <- scale(pxrf_scaled[-c(1:2)])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements pxrf_errors: Means of available values and error values for a quick glimpse

colnames(is.na(pxrf_all[-c(1:2)]))
##  [1] "MgO"   "Al2O3" "SiO2"  "P2O5"  "S"     "Cl"    "K2O"   "CaO"   "Ti"   
## [10] "V"     "Cr"    "Mn"    "Fe"    "Ni"    "Cu"    "Zn"    "As"    "Rb"   
## [19] "Sr"    "Y"     "Zr"    "Nb"    "Mo"    "Rh"    "Ag"    "Ba"    "Ta"   
## [28] "Pb"
# Mean values and mean errors
pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- colMeans(pxrf_errors[-c(1:2)])
pxrf_errors <- (pxrf_errors * 1000)
pxrf_errors <- round(pxrf_errors, digits = 2)

# Combining the individual measurement errors properly
pxrf_errors2 <- pxrf[,grepl("Err$", names(pxrf))]

### 
### errorfunc <- function(x){(1/length(x))*(sqrt(sum(x^2)))}
errorfunc <- function(x){(sqrt(sum(x^2)))}
pxrf_errors2<- aggregate(pxrf_errors2, by = list(pxrf$Sample), FUN = errorfunc)

rownames(pxrf_errors2) <- pxrf_errors2$'Group.1'
pxrf_errors2 <- pxrf_errors2 %>% select(-Group.1)

# Dataframe with correct combined errors for each sample
pxrf_errors2
##        MgO;Err Al2O3;Err  SiO2;Err   P2O5;Err      S;Err      Cl;Err    K2O;Err
## PP-10 1.601168 0.3255670 0.2932521 0.04862489 0.01989120 0.008616844 0.01409042
## PP-11 1.847949 0.3596388 0.3115961 0.05313765 0.02266385 0.010752209 0.01565918
## PP-12 1.256207 0.2599667 0.2245338 0.03929491 0.02075837 0.005185557 0.01187055
## PP-13 1.946213 0.3533752 0.3229759 0.05431620 0.01800306 0.010302427 0.01532677
## PP-14 1.828315 0.3663102 0.3134540 0.05319107 0.01909162 0.010418253 0.01484756
## PP-15 2.043850 0.3400062 0.2990354 0.04806173 0.01726644 0.009621850 0.01394023
## PP-16 2.205842 0.3400981 0.3037226 0.05113375 0.01846673 0.009538344 0.01445683
## PP-17 1.679541 0.3046607 0.2748861 0.04437060 0.01866467 0.008837986 0.01328684
## PP-18 1.687924 0.3336294 0.2614207 0.05326087 0.01843611 0.008593602 0.01435827
## PP-19 1.507273 0.3057109 0.2370499 0.04933660 0.01792819 0.007705842 0.01222538
## PP-9  2.122770 0.3516709 0.3110601 0.05144910 0.02172004 0.015384083 0.01406947
## S-1   1.539811 0.2894168 0.2616531 0.04289091 0.01795801 0.006718631 0.01231016
## S-2   2.529758 0.3198565 0.2846457 0.04420622 0.01663911 0.007645914 0.01299731
## S-3   1.472750 0.2921758 0.2620019 0.04243136 0.01726644 0.007609205 0.01312479
## S-4   1.967606 0.3326111 0.2943542 0.04747610 0.01868288 0.009143850 0.01443433
## S-5   2.717761 0.3153882 0.2909252 0.04425359 0.01668682 0.007749194 0.01341044
## S-6   2.261906 0.3381819 0.3213339 0.04234785 0.01651363 0.008720092 0.01426639
## S-7   3.389900 0.3473616 0.3174288 0.04483012 0.01634166 0.008651589 0.01478986
## S-8   2.212974 0.3367788 0.2935865 0.04775081 0.01631318 0.008803408 0.01391726
##          CaO;Err      Ti;Err       V;Err      Cr;Err      Mn;Err     Fe;Err
## PP-10 0.05642579 0.006759438 0.002887906 0.001849324 0.004446347 0.02069541
## PP-11 0.06275006 0.007453187 0.003234192 0.002022375 0.004803124 0.02255704
## PP-12 0.04450528 0.005543465 0.002491987 0.001459452 0.003038092 0.01321590
## PP-13 0.06060677 0.006878227 0.002947881 0.001849324 0.004596738 0.02106395
## PP-14 0.06393567 0.007335530 0.003117691 0.001849324 0.004518849 0.02178118
## PP-15 0.05783563 0.006485368 0.002774887 0.001737815 0.004001250 0.01931036
## PP-16 0.06018663 0.006765353 0.002944486 0.001791647 0.004518849 0.02176672
## PP-17 0.05023515 0.006236185 0.002785678 0.001700000 0.003837968 0.01810249
## PP-18 0.06711915 0.006894200 0.003127299 0.001797220 0.003828838 0.01825870
## PP-19 0.06210000 0.005906776 0.002773085 0.001615549 0.003165438 0.01489530
## PP-9  0.05786545 0.006827152 0.003019934 0.001941649 0.004742362 0.02247487
## S-1   0.05030189 0.005852350 0.002601922 0.001630951 0.003556684 0.01539935
## S-2   0.05229943 0.005838664 0.002483948 0.001618641 0.004743416 0.02101547
## S-3   0.04973349 0.005897457 0.002624881 0.001603122 0.003925557 0.01790475
## S-4   0.05483074 0.007278049 0.003446738 0.002111871 0.004636809 0.02110237
## S-5   0.05220824 0.005900847 0.002541653 0.001676305 0.004855924 0.02146742
## S-6   0.04863723 0.006240192 0.002541653 0.001676305 0.004851804 0.02149558
## S-7   0.05053207 0.006700746 0.002714774 0.001910497 0.005316014 0.02386022
## S-8   0.05786761 0.006410148 0.002714774 0.001802776 0.004446347 0.01951922
##            Co;Err      Ni;Err       Cu;Err       Zn;Err       As;Err
## PP-10 0.004042277 0.001445683 0.0012369317 0.0006928203 0.0003464102
## PP-11 0.003871692 0.001039230 0.0009273618 0.0005196152 0.0003464102
## PP-12 0.003246537 0.001813836 0.0010392305 0.0009380832 0.0003464102
## PP-13 0.003522783 0.001330413 0.0012727922 0.0007549834 0.0003464102
## PP-14 0.003698648 0.001212436 0.0011000000 0.0005830952 0.0003464102
## PP-15 0.003525621 0.001655295 0.0012884099 0.0008246211 0.0004123106
## PP-16 0.003813135 0.001445683 0.0012369317 0.0007549834 0.0004123106
## PP-17 0.003706751 0.001486607 0.0009949874 0.0007810250 0.0004123106
## PP-18 0.003525621 0.001157584 0.0010392305 0.0005830952 0.0003464102
## PP-19 0.002758623 0.001640122 0.0011000000 0.0007810250 0.0003000000
## PP-9  0.003926831 0.001220656 0.0009949874 0.0006403124 0.0003464102
## S-1   0.003410279 0.001565248 0.0010392305 0.0008124038 0.0003464102
## S-2   0.004273172 0.001272792 0.0008660254 0.0005830952 0.0003464102
## S-3   0.003648287 0.001688194 0.0011000000 0.0008774964 0.0003464102
## S-4   0.004272002 0.001157584 0.0008774964 0.0006403124 0.0003000000
## S-5   0.004330127 0.001157584 0.0008124038 0.0005830952 0.0003000000
## S-6   0.003986226 0.001220656 0.0008660254 0.0006403124 0.0003464102
## S-7   0.004505552 0.001212436 0.0008660254 0.0006928203 0.0003464102
## S-8   0.003645545 0.001330413 0.0010392305 0.0006928203 0.0003464102
##             Se;Err       Rb;Err       Sr;Err        Y;Err       Zr;Err
## PP-10 0.0005196152 0.0006403124 0.0011000000 0.0008660254 0.0006928203
## PP-11 0.0004690416 0.0005196152 0.0008660254 0.0007549834 0.0005196152
## PP-12 0.0007071068 0.0008774964 0.0013453624 0.0012806248 0.0009380832
## PP-13 0.0005196152 0.0005830952 0.0011575837 0.0009273618 0.0006928203
## PP-14 0.0005196152 0.0005196152 0.0009848858 0.0008660254 0.0006928203
## PP-15 0.0006557439 0.0007810250 0.0012884099 0.0011180340 0.0008774964
## PP-16 0.0005830952 0.0006928203 0.0011000000 0.0009273618 0.0007549834
## PP-17 0.0005830952 0.0007071068 0.0011180340 0.0010770330 0.0007810250
## PP-18 0.0005196152 0.0005830952 0.0008660254 0.0009273618 0.0006403124
## PP-19 0.0006164414 0.0007810250 0.0013190906 0.0011532563 0.0009110434
## PP-9  0.0004690416 0.0005830952 0.0011000000 0.0008774964 0.0006928203
## S-1   0.0006403124 0.0007549834 0.0012206556 0.0010488088 0.0008124038
## S-2   0.0004123106 0.0005196152 0.0009273618 0.0007549834 0.0006928203
## S-3   0.0006403124 0.0007681146 0.0012806248 0.0011090537 0.0008124038
## S-4   0.0005196152 0.0005196152 0.0009848858 0.0008660254 0.0006403124
## S-5   0.0003464102 0.0005196152 0.0009848858 0.0007549834 0.0006403124
## S-6   0.0004123106 0.0005196152 0.0008124038 0.0008124038 0.0006403124
## S-7   0.0003464102 0.0005196152 0.0008660254 0.0008124038 0.0006928203
## S-8   0.0005196152 0.0005830952 0.0009273618 0.0008660254 0.0006928203
##             Nb;Err       Mo;Err       Rh;Err      Pd;Err      Ag;Err
## PP-10 0.0009848858 0.0008660254 0.0010392305 0.002368544 0.001737815
## PP-11 0.0007549834 0.0006928203 0.0009219544 0.002714774 0.001385641
## PP-12 0.0012369317 0.0013304135 0.0008774964 0.001920937 0.002588436
## PP-13 0.0007549834 0.0009949874 0.0007810250 0.002549510 0.001749286
## PP-14 0.0007549834 0.0008660254 0.0010392305 0.002541653 0.001737815
## PP-15 0.0011445523 0.0011916375 0.0009848858 0.002269361 0.002193171
## PP-16 0.0009848858 0.0009848858 0.0010392305 0.002310844 0.001732051
## PP-17 0.0011532563 0.0011532563 0.0010488088 0.002336664 0.002080865
## PP-18 0.0009848858 0.0009949874 0.0010392305 0.002256103 0.001749286
## PP-19 0.0014966630 0.0011532563 0.0008774964 0.002066398 0.002515949
## PP-9  0.0009486833 0.0008124038 0.0006000000 0.002565151 0.001565248
## S-1   0.0011180340 0.0011180340 0.0007071068 0.002097618 0.002032240
## S-2   0.0007549834 0.0008246211 0.0010630146 0.002126029 0.001682260
## S-3   0.0012369317 0.0012369317 0.0009273618 0.002061553 0.002174856
## S-4   0.0008660254 0.0008660254 0.0009219544 0.002491987 0.001503330
## S-5   0.0007549834 0.0007549834 0.0013856406 0.002334524 0.001618641
## S-6   0.0007549834 0.0008124038 0.0007000000 0.002833725 0.001676305
## S-7   0.0006928203 0.0008660254 0.0013856406 0.001700000 0.001618641
## S-8   0.0009273618 0.0008660254 0.0009219544 0.002601922 0.001849324
##            Cd;Err     Sn;Err      Sb;Err     Ba;Err     La;Err     Ce;Err
## PP-10 0.003061046 0.01709035 0.007739509 0.02211674 0.03835127 0.00000000
## PP-11 0.002598076 0.01819368 0.006524569 0.01894202 0.03605371 0.00930000
## PP-12 0.005289612 0.01600531 0.008748143 0.03107748 0.05699009 0.00830000
## PP-13 0.003261901 0.01622683 0.006488451 0.02126546 0.03486976 0.01210165
## PP-14 0.003371943 0.01651787 0.007222880 0.02264045 0.04040644 0.00000000
## PP-15 0.004411349 0.01520066 0.008964932 0.02834960 0.04300605 0.00720000
## PP-16 0.003117691 0.01610248 0.007060453 0.02388598 0.03235522 0.01106345
## PP-17 0.004173727 0.01701000 0.008235290 0.02776040 0.04912952 0.00880000
## PP-18 0.002947881 0.01772597 0.006164414 0.02467934 0.03748146 0.01605677
## PP-19 0.005014978 0.01613227 0.008381527 0.03003964 0.05031680 0.01310611
## PP-9  0.002785678 0.01753710 0.007079548 0.02040466 0.02994311 0.01525582
## S-1   0.003656501 0.01642985 0.008542833 0.02708062 0.05036556 0.01209339
## S-2   0.003009983 0.02138831 0.007692204 0.02275895 0.04427561 0.01374191
## S-3   0.003733631 0.01599344 0.009433981 0.02679963 0.04624770 0.01197539
## S-4   0.003184337 0.01977777 0.006472248 0.02203134 0.03748853 0.00840000
## S-5   0.002944486 0.02293927 0.005312250 0.02166772 0.03187193 0.01362718
## S-6   0.002958040 0.01911675 0.007020684 0.02247087 0.03578058 0.00760000
## S-7   0.002944486 0.02235285 0.007563729 0.02263316 0.04144997 0.01409326
## S-8   0.003234192 0.01896734 0.008376754 0.02504496 0.04288251 0.01161034
##             Hf;Err       Ta;Err       W;Err       Pt;Err      Au;Err
## PP-10 0.0007549834 0.0009000000 0.001385641 0.0008660254 0.001964688
## PP-11 0.0005196152 0.0010392305 0.001157584 0.0008660254 0.001558846
## PP-12 0.0008774964 0.0016431677 0.001881489 0.0007549834 0.002690725
## PP-13 0.0006928203 0.0008246211 0.001272792 0.0008124038 0.001854724
## PP-14 0.0005830952 0.0007810250 0.001272792 0.0008660254 0.001791647
## PP-15 0.0008246211 0.0013152946 0.001603122 0.0007549834 0.002435159
## PP-16 0.0007549834 0.0012727922 0.001337909 0.0008660254 0.002083267
## PP-17 0.0007810250 0.0010295630 0.001584298 0.0007549834 0.002193171
## PP-18 0.0005830952 0.0009380832 0.001220656 0.0008124038 0.001910497
## PP-19 0.0007810250 0.0009949874 0.001805547 0.0007071068 0.002395830
## PP-9  0.0006403124 0.0007071068 0.001220656 0.0009273618 0.001802776
## S-1   0.0007549834 0.0012688578 0.001516575 0.0007549834 0.002269361
## S-2   0.0005830952 0.0010392305 0.001157584 0.0008660254 0.001618641
## S-3   0.0008124038 0.0008124038 0.001577973 0.0007071068 0.002345208
## S-4   0.0005830952 0.0009000000 0.001157584 0.0009273618 0.001732051
## S-5   0.0005196152 0.0007681146 0.001039230 0.0009273618 0.001558846
## S-6   0.0006403124 0.0009848858 0.001100000 0.0008660254 0.001618641
## S-7   0.0005196152 0.0009848858 0.001039230 0.0009848858 0.001558846
## S-8   0.0005830952 0.0010049876 0.001280625 0.0008660254 0.001849324
##             Hg;Err       Tl;Err      Pb;Err      Bi;Err      Th;Err       U;Err
## PP-10 0.0005196152 0.0004690416 0.002022375 0.002195450 0.002887906 0.006929646
## PP-11 0.0007071068 0.0006403124 0.001676305 0.001849324 0.002424871 0.005947268
## PP-12 0.0003316625 0.0002449490 0.003029851 0.003093542 0.003844477 0.009604686
## PP-13 0.0007071068 0.0005385165 0.002027313 0.002137756 0.002718455 0.006827884
## PP-14 0.0006928203 0.0005830952 0.001905256 0.002083267 0.002657066 0.006240192
## PP-15 0.0005385165 0.0004690416 0.002603843 0.002731300 0.003354102 0.008479387
## PP-16 0.0005830952 0.0005385165 0.002200000 0.002437212 0.002954657 0.007346428
## PP-17 0.0004582576 0.0003741657 0.002489980 0.002603843 0.003238827 0.008053571
## PP-18 0.0006403124 0.0004690416 0.002083267 0.002147091 0.002782086 0.006891299
## PP-19 0.0005477226 0.0004582576 0.002844293 0.002880972 0.003661967 0.009124144
## PP-9  0.0007000000 0.0006164414 0.001926136 0.001974842 0.002613427 0.006562774
## S-1   0.0003741657 0.0003000000 0.002441311 0.002613427 0.003303029 0.008292768
## S-2   0.0005196152 0.0004690416 0.001732051 0.001791647 0.002483948 0.006240192
## S-3   0.0004242641 0.0004242641 0.002580698 0.002561250 0.003327161 0.008466995
## S-4   0.0005830952 0.0004898979 0.001732051 0.001905256 0.002428992 0.006126989
## S-5   0.0005196152 0.0004690416 0.001618641 0.001732051 0.002368544 0.006120457
## S-6   0.0004690416 0.0004690416 0.001676305 0.001849324 0.002487971 0.006360818
## S-7   0.0006403124 0.0005196152 0.001558846 0.001791647 0.002424871 0.005948949
## S-8   0.0005196152 0.0004690416 0.001969772 0.002083267 0.002714774 0.006929646
# Replacing mean errors with calculated errors
pxrf_averaged[names(pxrf_errors2)] <- pxrf_errors2

# Printing table with errors for selected elements
pxrf_table <- pxrf_averaged[c(19:22,25:28,35:36,41:48)]
pxrf_table <-  (pxrf_table * 1000)
pxrf_table <- round(pxrf_table, digits = 2)
kable(pxrf_table)
Ti Ti;Err V V;Err Mn Mn;Err Fe Fe;Err Zn Zn;Err Rb Rb;Err Sr Sr;Err Y Y;Err Zr Zr;Err
PP-10 195.43 6.76 5.87 2.89 38.97 4.45 2213.53 20.70 3.73 0.69 1.47 0.64 39.00 1.10 1.40 0.87 8.07 0.69
PP-11 229.47 7.45 5.60 3.23 38.53 4.80 2386.63 22.56 2.53 0.52 1.60 0.52 29.03 0.87 1.13 0.75 7.47 0.52
PP-12 126.27 5.54 2.13 2.49 17.80 3.04 1005.83 13.22 3.23 0.94 1.83 0.88 28.30 1.35 0.50 1.28 8.87 0.94
PP-13 192.83 6.88 4.97 2.95 38.93 4.60 2131.17 21.06 5.00 0.75 2.07 0.58 48.40 1.16 1.47 0.93 10.17 0.69
PP-9 191.50 7.34 4.43 3.12 41.77 4.52 2448.13 21.78 3.87 0.58 2.13 0.52 46.83 0.98 1.47 0.87 11.83 0.69
PP-19 96.90 6.49 0.87 2.77 11.90 4.00 961.70 19.31 1.67 0.82 0.90 0.78 31.80 1.29 0.27 1.12 6.10 0.88
PP-14 218.77 6.77 5.73 2.94 33.47 4.52 2224.07 21.77 2.40 0.75 1.67 0.69 35.07 1.10 1.37 0.93 8.80 0.75
PP-15 182.10 6.24 3.97 2.79 27.10 3.84 1878.00 18.10 3.20 0.78 1.93 0.71 35.27 1.12 1.60 1.08 9.33 0.78
PP-16 192.17 6.89 4.53 3.13 39.77 3.83 2300.33 18.26 3.93 0.58 1.70 0.58 37.70 0.87 1.73 0.93 8.73 0.64
PP-17 170.90 5.91 3.57 2.77 28.60 3.17 1762.37 14.90 3.37 0.78 1.90 0.78 28.83 1.32 1.33 1.15 8.67 0.91
PP-18 161.77 6.83 2.67 3.02 20.83 4.74 1491.57 22.47 1.93 0.64 1.20 0.58 17.90 1.10 0.87 0.88 5.40 0.69
S-1 136.83 5.85 2.30 2.60 24.73 3.56 1295.87 15.40 3.40 0.81 2.00 0.75 33.93 1.22 1.37 1.05 9.17 0.81
S-2 155.77 5.84 2.93 2.48 44.03 4.74 2338.23 21.02 3.23 0.58 2.13 0.52 34.03 0.93 1.73 0.75 9.63 0.69
S-3 143.07 5.90 3.17 2.62 31.73 3.93 1677.03 17.90 4.47 0.88 2.13 0.77 40.00 1.28 1.60 1.11 9.67 0.81
S-4 181.07 7.28 1.33 3.45 42.73 4.64 2308.13 21.10 3.77 0.64 2.10 0.52 35.93 0.98 1.63 0.87 8.73 0.64
S-5 155.43 5.90 3.40 2.54 46.97 4.86 2435.07 21.47 3.70 0.58 2.33 0.52 40.87 0.98 1.80 0.75 10.40 0.64
S-6 203.23 6.24 3.77 2.54 52.17 4.85 2555.17 21.50 3.07 0.64 2.87 0.52 23.97 0.81 2.20 0.81 9.47 0.64
S-7 237.07 6.70 1.13 2.71 56.33 5.32 3061.00 23.86 4.03 0.69 3.57 0.52 31.03 0.87 2.60 0.81 12.20 0.69
S-8 175.37 6.41 1.50 2.71 36.27 4.45 1912.43 19.52 3.10 0.69 2.17 0.58 28.43 0.93 2.10 0.87 7.83 0.69

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Rh) %>% 
        select(-Ag) %>% 
        select(-MgO) %>% 
        select(-Cu) %>% 
        select(-Cr) %>% 
        select(-Nb) %>% 
        select(-Mo) %>% 
        select(-Ba) %>% 
        select(-Ta) %>% 
        select(-Pb)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"

pxrf_MB: Final analysis data set for MUDBRICK samples only (= no soil)

pxrf_MB <- pxrf_final[c(1:11), ]

pXRF: K means cluster analysis

Only mudbrick samples, K-means cluster analysis with 5 possible clusters

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_MB[3:10], 4), data = pxrf_MB, label = TRUE, label.size = 3)

pXRF: PCA

PCA with only mudbrick samples:

# PCA
PCA_01 <- PCA(pxrf_MB[3:10], graph = FALSE)

# Eigenvalues
get_eigenvalue(PCA_01)
##         eigenvalue variance.percent cumulative.variance.percent
## Dim.1 5.7466885592     71.833606990                    71.83361
## Dim.2 1.3033985049     16.292481311                    88.12609
## Dim.3 0.4756033157      5.945041447                    94.07113
## Dim.4 0.2068036695      2.585045869                    96.65618
## Dim.5 0.1671510456      2.089388071                    98.74556
## Dim.6 0.0875290547      1.094113184                    99.83968
## Dim.7 0.0123359358      0.154199198                    99.99388
## Dim.8 0.0004899144      0.006123931                   100.00000
fviz_eig(PCA_01)

# Contributions of variables to PC1 and PC2
fviz_contrib(PCA_01, choice = "var", axes = 1, top = 10)

fviz_contrib(PCA_01, choice = "var", axes = 2, top = 10)

# Graph of variables with contribution indication
fviz_pca_var(PCA_01, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE # Avoid text overlapping
             )

# Graph of individuals with contribution indication
fviz_pca_ind(PCA_01,
             col.ind = pxrf_MB$Area, # color by groups
             palette = c("#00AFBB", "#E7B800", "#FC4E07"),
             legend.title = "Context")

#3D plot
scores = as.data.frame(PCA_01$ind)
scatter3D(scores$'coord.Dim.2', scores$'coord.Dim.3', scores$'coord.Dim.1', 
          xlab = "PC2 (16,29 %)", ylab = "PC3 (5,95 %)", zlab="PC1 (71,83 %)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$'coord.Dim.2', scores$'coord.Dim.3', scores$'coord.Dim.1', labels = rownames(scores),
          add = TRUE, colkey = FALSE, cex = 0.7)

PCA with soil samples:

# PCA
PCA_02 <- PCA(pxrf_final[3:10], graph = FALSE)

# Eigenvalues
get_eigenvalue(PCA_02)
##       eigenvalue variance.percent cumulative.variance.percent
## Dim.1 4.45474013       55.6842516                    55.68425
## Dim.2 1.55511307       19.4389133                    75.12316
## Dim.3 1.31381319       16.4226649                    91.54583
## Dim.4 0.28293302        3.5366628                    95.08249
## Dim.5 0.16050235        2.0062794                    97.08877
## Dim.6 0.11809153        1.4761442                    98.56492
## Dim.7 0.10441527        1.3051909                    99.87011
## Dim.8 0.01039143        0.1298929                   100.00000
fviz_eig(PCA_02)

# Contributions of variables to PC1 and PC2
fviz_contrib(PCA_02, choice = "var", axes = 1, top = 10)

fviz_contrib(PCA_02, choice = "var", axes = 2, top = 10)

fviz_contrib(PCA_02, choice = "var", axes = 3, top = 10)

# Graph of variables with contribution indication
fviz_pca_var(PCA_02, col.var = "contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"), 
             repel = TRUE # Avoid text overlapping
             )

# Graph of individuals
fviz_pca_ind(PCA_02,
             col.ind = pxrf_final$Area,
             legend.title = "Context")

# Graph of individuals
fviz_pca_ind(PCA_02,
             col.ind = pxrf_final$Type, # color by groups
             legend.title = "Type")

#3D plot
scores = as.data.frame(PCA_02$ind)
scatter3D(scores$'coord.Dim.2', scores$'coord.Dim.3', scores$'coord.Dim.1', 
          xlab = "PC2 (19,44 %)", ylab = "PC3 (16,42 %)", zlab="PC1 (55,68 %)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$'coord.Dim.2', scores$'coord.Dim.3', scores$'coord.Dim.1', labels = rownames(scores),
          add = TRUE, colkey = FALSE, cex = 0.7)

pXRF: HCA

HCA including only MB samples, colored by area:

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_MB[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas")
    rect.dendrogram(dend, 6, border = "Grey", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

# PCA biplot with HCA groups
    
# Compute PCA with ncp = 3
res.pca <- PCA(pxrf_MB[3:10], ncp = 3, graph = FALSE)
# Compute hierarchical clustering on principal components
res.hcpc <- HCPC(res.pca, graph = FALSE)
    
fviz_dend(res.hcpc, 
          cex = 0.7,                     # Label size
          palette = "jco",               # Color palette see ?ggpubr::ggpar
          rect = TRUE, rect_fill = TRUE, # Add rectangle around groups
          rect_border = "jco",           # Rectangle color
          labels_track_height = 0.8      # Augment the room for labels
          )

  fviz_cluster(res.hcpc,
             repel = TRUE,            # Avoid label overlapping
             show.clust.cent = TRUE, # Show cluster centers
             palette = "jco",         # Color palette see ?ggpubr::ggpar
             ggtheme = theme_minimal(),
             main = "Factor map"
             )

HCA with soil samples, colored by type:

# HCA dendrogram, samples color coded by type:
dend3 <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    type <- as.factor(pxrf_final[, 2])
    n_type <- length(unique(type))
    cols_t <- colorspace::rainbow_hcl(n_type, c = 70, l  = 50)
    col_type <- cols_t[type]
    labels_colors(dend3) <- col_type[order.dendrogram(dend3)]

    plot(dend3, main="HCA with all samples by type")
    rect.dendrogram(dend3, 2, border = "Grey", lty = 5)
    legend("topright", legend = levels(type), fill = cols_t)

    # PCA biplot with HCA groups
    
# Compute PCA with ncp = 3
res.pca <- PCA(pxrf_final[3:10], ncp = 5, graph = FALSE)
# Compute hierarchical clustering on principal components
res.hcpc <- HCPC(res.pca, graph = FALSE)
    
fviz_dend(res.hcpc, 
          cex = 0.7,                     # Label size
          palette = "jco",               # Color palette see ?ggpubr::ggpar
          rect = TRUE, rect_fill = TRUE, # Add rectangle around groups
          rect_border = "jco",           # Rectangle color
          labels_track_height = 0.8      # Augment the room for labels
          )

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_PP.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

library(knitr)
grain_simple <- round(grain[5:7], digits = 2)
kable(grain_simple)
Clay Silt Sand
PP-10 0.85 21.27 77.88
PP-11 1.30 40.96 57.74
PP-12 11.55 37.30 51.15
PP-13 2.57 45.90 51.53
PP-9 1.73 46.27 51.99
PP-19 1.72 39.35 58.93
PP-14 0.94 56.42 42.63
PP-15 9.72 36.47 53.81
PP-16 0.92 36.94 62.14
PP-17 1.45 43.77 54.78
PP-18 1.11 65.68 33.20
S-1 1.02 30.69 68.29
S-2 10.66 75.17 14.17
S-3 0.71 19.69 79.60
S-4 3.55 86.67 9.78
S-5 0.57 6.12 93.31
S-6 4.18 75.65 20.18
S-7 13.74 30.01 56.26
S-8 11.91 26.96 61.13

Ternary plots

# Creating subsets for only mudbricks
MB_grain <- grain_01[c(1:11),]
MB_context <- grain[c(1:11),]

# Ternary plots
ggtern(data=MB_grain, aes(x=Clay, y=Sand, z=Silt, color=MB_context$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(MB_grain)), size=3)

#Scaled
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand",
       color="Type",
       shape="Context") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2, aes(shape = factor(grain$Context))) + 
  geom_text(aes(label=rownames(grain_01)), size=3, hjust=1.2, vjust=1) + 
  theme_showarrows()

# Unscaled
ggtern(data=grain[5:7], aes(x=Clay, y=Sand, z=Silt, color=grain$Type)) +
  labs(title="Clay silt sand unscaled",
       color="Type",
       shape="Context") +
  theme_nomask() +
  geom_mask() +
  geom_point(size=2, aes(shape = factor(grain$Context))) + 
  geom_text(aes(label=rownames(grain_01)), size=3, hjust=1.2, vjust=1) + 
  theme_showarrows()

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Only MB samples
grain_MB2 <- grain[c(1:11),]
grain_long3 <- melt(grain_MB2[c(1,14:113)], id = "Sample") 

ggplot(grain_long3, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves - MB only")

Barplots for 5 size fractions:

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle("Grain size distribution by fractions")

# Stacked barplot
grain_long4 <- melt(grain_MB2[c(1,9:13)], id = "Sample") 

ggplot(grain_long4, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    ggtitle("Grain size distribution by fractions - MB only")

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_PP.xlsx", sep=";")
tga <- read.xlsx("data/tga_PP.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Creating MB only subsets
loi_MB <- subset(loi[9:19, ])

tga_MB <- subset(tga[9:20, ])
tga_MB <- subset(tga_MB[-4, ]) # No duplicates

tga_MB1 <- subset(tga[9:22, ]) # Includes some duplicates

# Transforming data to long form (MB only)
loi_MB_long <- loi_MB[c(8,13:14)] 
loi_MB_long <- melt(loi_MB_long, id = "context") 

tga_MB_long <- tga_MB1[c(8,10:11)] 
tga_MB_long <- melt(tga_MB_long, id = "context") 

# Transforming data to long form (Soil included)
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi_MB[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga_MB[c(1,10:11)], value.name = "Temp", id= "Name") 

LOI dataset:

loi[c(8:9,13:14)]
##                context     type     c550     c950
## S-1   Hadjuabudllah l1     soil 5.986894 19.67880
## S-2   Hadjuabudllah l2     soil 4.466405 17.59870
## S-3      Laona soil l1     soil 5.734589 21.55943
## S-4      Laona soil l2     soil 4.323061 17.76702
## S-5      Laona soil l3     soil 6.877261 19.61697
## S-6      Laona soil l1     soil 5.038672 17.28507
## S-7      Laona soil l2     soil 5.582797 17.71488
## S-8      Laona soil l3     soil 5.134295 19.63221
## PP-9            LA54:4 mudbrick 7.451505 18.35791
## PP-10           LA54:4 mudbrick 3.557623 14.55842
## PP-11           LA54:4 mudbrick 4.002255 21.24757
## PP-12           LA54:4 mudbrick 2.797378 22.23702
## PP-13           LA54:4 mudbrick 3.156025 21.81049
## PP-14           LA59:2 mudbrick 2.818483 22.65698
## PP-15           LA59:2 mudbrick 3.822946 25.15246
## PP-16           LA59:2 mudbrick 2.365256 21.25664
## PP-17           LA59:2 mudbrick 3.860836 20.58203
## PP-18           LA59:2 mudbrick 2.451871 33.21076
## PP-19           LA54:7 mudbrick 1.739797 33.78112
## S-1_2 Hadjuabudllah l1     soil 6.137732 21.18259
## S-2_2 Hadjuabudllah l2     soil 4.429557 18.22193
## S-3_2    Laona soil l1     soil 5.521160 22.61670
## S-4_2    Laona soil l2     soil 4.582459 20.95611
## S-5_2    Laona soil l3     soil 6.888629 20.42575
## S-6_2    Laona soil l1     soil 5.348806 16.70897
## S-7_2    Laona soil l2     soil 5.537857 18.92266
## S-8_2    Laona soil l3     soil 5.230428 19.99844

TGA dataset:

tga[c(8:9,10:11)]
##           context     type     c550     c950
## S-1          soil     soil 5.328983 22.05629
## S-2          soil     soil 3.798409 18.86880
## S-3          soil     soil 4.452588 21.57446
## S-4          soil     soil 4.537622 20.07908
## S-5          soil     soil 4.657919 23.10036
## S-6          soil     soil 4.892221 16.89988
## S-7          soil     soil 4.601116 18.47942
## S-8          soil     soil 4.525488 20.99294
## PP-9       LA54:4 mudbrick 5.392111 18.62861
## PP-10      LA54:4 mudbrick 2.360845 15.24474
## PP-11      LA54:4 mudbrick 2.773246 24.52314
## PP-11 (2)  LA54:4 mudbrick 3.164817 20.66002
## PP-12      LA54:4 mudbrick 2.734021 21.45383
## PP-13      LA54:4 mudbrick 3.235943 21.34319
## PP-14      LA59:2 mudbrick 3.202329 22.49060
## PP-15      LA59:2 mudbrick 3.508931 22.92802
## PP-16      LA59:2 mudbrick 2.215719 19.75916
## PP-17      LA59:2 mudbrick 3.921377 22.16794
## PP-18      LA59:2 mudbrick 3.234501 31.05334
## PP-19      LA54:7 mudbrick 2.204428 33.79251
## PP-10 (2)  LA54:4 mudbrick 2.226568 14.54839
## PP-9 (2)   LA54:4 mudbrick 5.258741 19.01388
## S-8 (2)      soil     soil 4.474886 22.50903
## S-7 (2)      soil     soil 4.739991 19.00483
## S-6 (2)      soil     soil 4.618851 18.27756
## S-5 (2)      soil     soil 5.122344 22.59504
## S-4 (2)      soil     soil 4.559860 20.33749
## S-3 (2)      soil     soil 4.302926 20.35644

Boxplots

# Colored boxplots: Trad LOI, MB only
ggplot(loi_MB_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (MB only)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI, MB only
ggplot(tga_MB_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (MB only)",
        x ="Temperature", y = "LOI")

# Colored boxplots: Trad LOI, soil samples included
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI (soil included)",
        x ="Temperature", y = "LOI")

# Colored boxplots: TGA LOI, soil samples included
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI (soil included)",
        x ="Temperature", y = "LOI")

Biplots

# MB only biplot: Trad LOI
ggplot(loi_MB, 
      aes(c550, c950, label = rownames(loi_MB), color = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI: MB only, organic vs. carbonate content", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# MB only biplot: TGA LOI
ggplot(tga_MB, 
      aes(c550, c950, label = rownames(tga_MB), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI: MB only, organic vs. carbonate content", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2, aes(shape = factor(type))) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI, soil included - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by type biplot: TGA
p1 <- ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(type))) +
      geom_point(size=2, aes(shape = factor(context))) +
      geom_text_repel(size=3) + 
      labs(color = "Type",
           shape = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text(), legend.position="left") 
p1

# Combining plots to the same figure
library(patchwork)
p2 <- ggplot(tga) + geom_boxplot(aes(c550, color = context)) + 
      theme(legend.position="left")
p3 <- ggplot(tga) + geom_boxplot(aes(c950, color = context), show.legend = FALSE) +
      coord_flip()

# Simple arrangement
p2 /
(p3 | p1)

# Prettier figure
theme_marginal_x <- theme(axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
theme_marginal_y <- theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank())
wrap_plots(
  p2 + coord_cartesian(xlim = c(2, 6)) + theme_marginal_x,   
  plot_spacer(),  
  p1 + coord_cartesian(xlim = c(2, 6), ylim = c(10, 35)), 
  p3 + coord_flip(xlim = c(10, 35)) + theme_marginal_y,  
  nrow = 2,
  widths = c(1, 0.3),
  heights = c(0.4, 1)
)

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI (MB only)", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI (MB only)",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_PP.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :62.38   Min.   : 7.27   Min.   :19.90  
##  1st Qu.:63.47   1st Qu.: 8.50   1st Qu.:21.36  
##  Median :64.19   Median : 9.34   Median :21.72  
##  Mean   :65.50   Mean   : 9.29   Mean   :21.92  
##  3rd Qu.:65.78   3rd Qu.:10.56   3rd Qu.:22.96  
##  Max.   :72.00   Max.   :10.90   Max.   :23.90
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

### Combined results

Combined PCA and HCA: including geochemistry and LOI data

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(plot3D); # 3D plots
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/combo_PP.xlsx", sep=";")

# Transforming remaining NA values to zeros for analysis
pxrf[is.na(pxrf)] <- 0
pxrf[4:16] <- scale(pxrf[4:16])

pxrf_MB <- pxrf[c(9:19), ]
rownames(pxrf_MB) <- pxrf_MB$Sample

PCA & HCA Combided geochemistry + LOI

# PCA analysis
pca_1 <- prcomp(pxrf_MB[4:13])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.5575 1.2647 0.92468 0.76958 0.52297 0.37607 0.32250
## Proportion of Variance 0.6453 0.1578 0.08436 0.05843 0.02698 0.01395 0.01026
## Cumulative Proportion  0.6453 0.8031 0.88749 0.94592 0.97290 0.98686 0.99712
##                            PC8     PC9    PC10
## Standard deviation     0.16348 0.04660 0.01791
## Proportion of Variance 0.00264 0.00021 0.00003
## Cumulative Proportion  0.99975 0.99997 1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA (combined results) Palaepaphos elements")

autoplot(pca_1, data=pxrf_MB, colour='Area', shape = FALSE, label = TRUE,  main = "PCA (combined results) Palaepaphos grouped by area")

#3D plot
scores = as.data.frame(pca_1$x)
scatter3D(scores$PC2, scores$PC3, scores$PC1, 
          xlab = "PC2 (21,3 %)", ylab = "PC3 (8,4 %)", zlab="PC1 (60,8 %)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
text3D(scores$PC2, scores$PC3, scores$PC1, labels = rownames(scores),
          add = TRUE, colkey = FALSE, cex = 0.7)

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_MB %>%                         # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_MB[, 2])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas")
    rect.dendrogram(dend, 6, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

Artaxata

Intro about the site

pXRF

From literature we can identify three rough classes of elements that could be used to analyze mudbricks from pXRF results:

Good: Rb, Sr, Y, Zr, Nb, Th, Pb, Cu, Zn, Fe
Neutral: V, Cr, Co, Ni, Ca, Ti
Bad: Na, P, Ba (+ Ca, Al)

The “bad” elements were excluded from the pXRF results from the get-go. After scaling and averaging the remaining values we ended up with two analysis datasets: “pxrf_all” that contains all the elements that have non-zero results, and “pxrf_no_na”, in which all the elements that have even one NA result are omitted.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr);
library(ggfortify); # PCA autoplots
library(FactoMineR); # Fast PCA graphs
library(dendextend); # Dendrograms

# Read the raw data
pxrf <- read.xlsx("data/pxrf_AA.xlsx", sep=";")

# Remove any rows containing words 'ERROR' or 'TEST'
pxrf <- filter(pxrf, !grepl('TEST', Sample))
pxrf <- filter(pxrf, !grepl('ERROR', Sample))

# Remove unused columns
pxrf <- pxrf %>% 
        select(-Site) %>% 
        select(-'Other;name') %>% 
        select(-'File;#') %>% 
        select(-DateTime) %>% 
        select(-Application) %>% 
        select(-Method) %>% 
        select(-ElapsedTime) %>% 
        select(-'Cal;Check')
  
# Coercing "< LOD" -results to NA:s  
pxrf[-c(1:3)] <- pxrf[-c(1:3)] %>% mutate_if(is.character,as.numeric)

# Transforming NA values to zeros
pxrf[is.na(pxrf)] <- 0

# Averaging results by "Sample", "Area" and "Type" columns
pxrf_averaged <- aggregate(pxrf, by = list(pxrf$Sample, pxrf$Area, pxrf$Type), FUN = mean)

# Removing old (now empty) columns
pxrf_averaged <- pxrf_averaged %>% select(-Sample)
pxrf_averaged <- pxrf_averaged %>% select(-Area)
pxrf_averaged <- pxrf_averaged %>% select(-Type)

# Assigning sample names as new row names 
rownames(pxrf_averaged) <- pxrf_averaged$'Group.1'
pxrf_averaged <- pxrf_averaged %>% select(-Group.1)

# Renaming the other two newly created groups back to "Type" and "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.2"] <- "Area"
names(pxrf_averaged)[names(pxrf_averaged) == "Group.3"] <- "Type"

# Removing error columns from the scaled analysis data set
pxrf_scaled <- pxrf_averaged %>% select(-ends_with("Err"))

# Scaling the data with standard z-score method
pxrf_scaled[-c(1:2)] <- scale(pxrf_scaled[-c(1:2)])

# Removing columns that only contain NA values
all_na <- function(x) any(!is.na(x))
pxrf_all <- pxrf_scaled %>% select_if(all_na)

pxrf_all: All possible elements pxrf_errors: Means of available values and error values for a quick glimpse

colnames(is.na(pxrf_all[-c(1:2)]))
##  [1] "MgO"   "Al2O3" "SiO2"  "P2O5"  "S"     "Cl"    "K2O"   "CaO"   "Ti"   
## [10] "V"     "Cr"    "Mn"    "Fe"    "Co"    "Ni"    "Cu"    "Zn"    "As"   
## [19] "Rb"    "Sr"    "Y"     "Zr"    "Nb"    "Ag"    "Sn"    "Ba"    "Ta"   
## [28] "Pb"    "Bi"
pxrf_errors <- pxrf_averaged %>% select_if(all_na)
pxrf_errors <- colMeans(pxrf_errors[-c(1:2)])
pxrf_errors <- (pxrf_errors * 1000)
pxrf_errors <- round(pxrf_errors, digits = 2)
pxrf_errors
##       MgO   MgO;Err     Al2O3 Al2O3;Err      SiO2  SiO2;Err      P2O5  P2O5;Err 
##   4069.21   1064.91   5230.61    191.43  23459.74    187.28    241.05     23.76 
##         S     S;Err        Cl    Cl;Err       K2O   K2O;Err       CaO   CaO;Err 
##   1093.58     15.93    356.63      6.54   1269.71     10.16   7609.01     21.24 
##        Ti    Ti;Err         V     V;Err        Cr    Cr;Err        Mn    Mn;Err 
##    311.59      4.03      2.22      1.58      1.55      1.07     49.88      2.67 
##        Fe    Fe;Err        Co    Co;Err        Ni    Ni;Err        Cu    Cu;Err 
##   2665.34     11.73      1.39      2.32      6.55      0.84      4.33      0.44 
##        Zn    Zn;Err        As    As;Err        Se    Se;Err        Rb    Rb;Err 
##      5.46      0.44      0.49      0.20      0.00      0.29      5.29      0.38 
##        Sr    Sr;Err         Y     Y;Err        Zr    Zr;Err        Nb    Nb;Err 
##     45.78      0.64      1.94      0.50     15.28      0.42      0.65      0.46 
##        Mo    Mo;Err        Rh    Rh;Err        Pd    Pd;Err        Ag    Ag;Err 
##      0.00      0.50      0.00      0.63      0.00      1.47      0.03      0.95 
##        Cd    Cd;Err        Sn    Sn;Err        Sb    Sb;Err        Ba    Ba;Err 
##      0.00      1.83      0.44     10.06      0.00      3.77     32.42     12.67 
##        La    La;Err        Ce    Ce;Err        Hf    Hf;Err        Ta    Ta;Err 
##      0.00     19.00      0.00      4.82      0.00      0.35      0.04      0.54 
##         W     W;Err        Pt    Pt;Err        Au    Au;Err        Hg    Hg;Err 
##      0.00      0.72      0.00      0.52      0.00      1.05      0.00      0.27 
##        Tl    Tl;Err        Pb    Pb;Err        Bi    Bi;Err        Th    Th;Err 
##      0.00      0.24      0.06      1.11      0.05      1.21      0.00      1.56 
##         U     U;Err 
##      0.00      3.85

pxrf_final: Final analysis data set with only the selected elements

# Dropping the dubious elements and elements with too many NA values
pxrf_final <- pxrf_all %>% 
        select(-Al2O3) %>% 
        select(-CaO) %>% 
        select(-SiO2) %>%
        select(-Cl) %>%  
        select(-K2O) %>% 
        select(-Ni) %>% 
        select(-P2O5) %>%  
        select(-S) %>% 
        select(-As) %>% 
        select(-Nb) %>% 
        select(-Cr)  %>% 
        select(-Ba) %>% 
        select(-MgO) %>% 
        select(-Cu) %>% 
        select(-Co) %>% 
        select(-Ag) %>% 
        select(-Sn) %>% 
        select(-Ta) %>% 
        select(-Pb) %>% 
        select(-Bi)

# Transforming remaining NA values to zeros for analysis
pxrf_final[is.na(pxrf_final)] <- 0

# Final analysis data set        
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"

pXRF: K means cluster analysis

# PC autoplot with K-means clusters
set.seed(1)
autoplot(kmeans(pxrf_final[3:10], 4), data = pxrf_final, label = TRUE, label.size = 3)

pXRF: PCA

PCA with final elements:

# Elements
colnames(pxrf_final[-c(1:2)])
## [1] "Ti" "V"  "Mn" "Fe" "Zn" "Rb" "Sr" "Y"  "Zr"
# PCA analysis
pca_1 <- prcomp(pxrf_final[3:10])
summary(pca_1)
## Importance of components:
##                           PC1    PC2     PC3     PC4     PC5     PC6     PC7
## Standard deviation     2.3415 1.1244 0.80489 0.58457 0.43484 0.23991 0.12562
## Proportion of Variance 0.6853 0.1580 0.08098 0.04272 0.02364 0.00719 0.00197
## Cumulative Proportion  0.6853 0.8434 0.92434 0.96706 0.99069 0.99789 0.99986
##                            PC8
## Standard deviation     0.03365
## Proportion of Variance 0.00014
## Cumulative Proportion  1.00000
# PCA plots
biplot(pca_1, choices = 1:2, cex = c(1, 1.2), col = c("grey80", "deeppink2"), main = "PCA Artaxata elements")

autoplot(pca_1, data=pxrf_final, colour='Area', shape = FALSE, label = TRUE,  main = "PCA Artaxata grouped by area")

PCA(pxrf_final[3:10])

## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 10 individuals, described by 8 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"

pXRF: HCA

# HCA dendrogram, samples color coded by area:
dend <- 
    pxrf_final %>%                      # data
    dist %>%                            # calculate a distance matrix
    hclust(method = "ward.D2") %>%      # hierarchical clustering 
    as.dendrogram %>%                   # turn the object into a dendrogram
    highlight_branches_col

    area <- as.factor(pxrf_final[, 1])
    n_area <- length(unique(area))
    cols_a <- colorspace::rainbow_hcl(n_area, c = 70, l  = 50)
    col_area <- cols_a[area]
    labels_colors(dend) <- col_area[order.dendrogram(dend)]

    plot(dend, main="HCA with sample areas (Copper included)")
    rect.dendrogram(dend, 5, border = "Black", lty = 5)
    legend("topright", legend = levels(area), fill = cols_a)

Grain size analysis

Data preparation

# Libraries
library(dplyr); 
library(openxlsx); 
library(ggtern);
library(reshape2)

# Read and filter data
grain <- read.xlsx("data/grain_AA.xlsx", sep=";")

# Remove automatically created averages in order to include results from multiple runs for the same sample
grain <- grain %>% 
  filter(!grepl('Average', Sample)) %>% 
  filter(!grepl('test', Sample))

# Average data by "Sample", "Context" and "Type" columns from the original dataset
grain <- aggregate(grain, by=list(grain$Sample, grain$Context, grain$Type), FUN=mean)

# Remove the now empty columns for clarity
grain <- grain %>% select(-Sample)
grain <- grain %>% select(-Context)
grain <- grain %>% select(-Type)

# Assign sample names ("Group.1") as row names, and rename the other created columns back to "Type" and "Area" for clarity
rownames(grain) <- grain$Group.1
names(grain)[names(grain) == "Group.1"] <- "Sample"
names(grain)[names(grain) == "Group.2"] <- "Context"
names(grain)[names(grain) == "Group.3"] <- "Type"

# Scaling clay-silt-sand portions to values between 0 and 1
# (Otherwise the differences in low clay percentages are too small to differentiate)
normalize <- function(x, na.rm=TRUE){(x-min(x, na.rm=TRUE))/(max(x, na.rm=TRUE)-min(x, na.rm=TRUE))}

# Apply function "normalize" to clay, silt and sand columns
grain_01 <- as.data.frame(apply(grain[5:7], 2, normalize))

Ternary plots

# Ternary plots
ggtern(data=grain_01, aes(x=Clay, y=Sand, z=Silt, color=grain$Context)) +
  labs(title="Clay silt sand") +
  theme_rgbw() +
  theme_nomask() +
  geom_point(size=0) +
  geom_text(aes(label=rownames(grain_01)), size=3)

Line plots

# Reshaping data to long form
grain_long <- melt(grain[c(1,14:113)], id = "Sample") 

ggplot(grain_long, aes(x=variable, y=value, color=Sample, group=Sample)) +
    geom_line() +
    ggtitle("Grain size distribution curves")

# Stacked barplot
grain_long2 <- melt(grain[c(1,9:13)], id = "Sample") 

ggplot(grain_long2, aes(x = reorder(Sample, value), y = value, fill = variable)) +
  geom_bar(stat = 'identity', position = 'fill') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Loss on ignition

We did loss on ignition analysis both in the traditional manual method and with a modern TGA for comparison.

Data preparation

# Libraries
library(openxlsx); 
library(dplyr); 
library(ggplot2); 
library(ggrepel); # Preventing labels from overlapping
library(reshape2) # Melt-function for reshaping data to long form

loi <- read.xlsx("data/loi_AA.xlsx", sep=";")
tga <- read.xlsx("data/tga_AA.xlsx", sep=";")

# Remove rows containing words 'kekkila' (internal standard)
tga <- filter(tga, !grepl('kekkila', Name))

# TGA character columns to numeric
tga[, c(4:7)] <- sapply(tga[, c(4:7)], as.numeric)

# Sample names as rownames
rownames(loi) <- loi$sample
rownames(tga) <- tga$Name

# Creating new columns for analysis by subtracting crucible mass;
# "dry" (mass after drying), "c550" (mass after 550 C), "c950" (mass after 950 C)
loi$dry <- (loi$dry_weight - loi$crucible)
loi$c550mass <- (loi$after_550_C - loi$crucible)
loi$c950mass <- (loi$after_950_C - loi$crucible)

loi$c550 <- (loi$dry - loi$c550mass)/(loi$dry) * 100
loi$c950 <- (loi$c550mass - loi$c950mass)/(loi$c550mass) * 100

# Same with TGA
tga$c550 <- (tga$Dry - tga$Mass_550)/(tga$Dry) * 100
tga$c950 <- (tga$Mass_550 - tga$Mass_950)/(tga$Mass_550) * 100

# Transforming data to long form 
loi_long <- loi[c(8,13:14)]
loi_long <- melt(loi_long, id = "context") 

tga_long <- tga[c(8,10:11)]
tga_long <- melt(tga_long, id = "context") 

# Long form data for stacked barplots
loi_long2 <- melt(loi[c(1,13:14)], value.name = "Temp", id= "sample") 

tga_long2 <- melt(tga[c(1,10:11)], value.name = "Temp", id= "Name")

LOI dataset:

loi[c(8:9,13:14)]
##               context     type     c550     c950
## AA-1          Hill II mudbrick 5.610986 5.756990
## AA-2          Hill II mudbrick 4.711074 7.965070
## AA-3          Hill II mudbrick 4.035368 7.841600
## AA-4    Urartian silo mudbrick 3.080904 7.738517
## AA-5          Phase I mudbrick 2.940399 6.257292
## AA-6          Phase I mudbrick 3.125140 5.507166
## AA-7  Phase II or III mudbrick 4.573474 6.267972
## AA-8  Phase II or III mudbrick 3.769098 7.238074
## AA-9         Phase II mudbrick 3.696483 6.966079
## AA-10        Phase II mudbrick 5.181930 5.580024

TGA dataset:

tga[c(8:9,10:11)]
##                  context     type     c550     c950
## AA-1             Hill II mudbrick 5.811508 5.611934
## AA-2             Hill II mudbrick 4.751337 8.599873
## AA-3             Hill II mudbrick 4.078462 7.997570
## AA-4       Urartian silo mudbrick 3.403442 7.086302
## AA-4 (2)   Urartian silo mudbrick 3.415174 7.167431
## AA-5             Phase I mudbrick 2.799218 6.532557
## AA-6             Phase I mudbrick 3.118409 4.097341
## AA-7     Phase II or III mudbrick 4.395924 6.082521
## AA-8     Phase II or III mudbrick 3.689788 6.722017
## AA-9            Phase II mudbrick 4.417015 6.467449
## AA-10           Phase II mudbrick 4.307988 6.154440

Boxplots

# Colored boxplots: Trad LOI
ggplot(loi_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Traditional LOI",
       color = "Context",
       x ="Temperature", 
       y = "LOI")

# Colored boxplots: TGA LOI
ggplot(tga_long, aes(x = variable, y = value, color = context)) +  
  geom_boxplot() +
  labs(title="Thermogravimeter LOI",
       color = "Context",
       x ="Temperature", 
       y = "LOI")

Biplots

# Color by context biplot: Trad LOI
ggplot(loi, 
      aes(c550, c950, label = rownames(loi), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Traditional LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

# Color by context biplot: TGA
ggplot(tga, 
      aes(c550, c950, label = rownames(tga), colour = factor(context))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "TGA LOI - color by context", 
           color = "Context",
           x = "Organic LOI (%)", 
           y = "Carbonate LOI (%)") +
      theme(axis.title = element_text())

Stacked barplots

# Trad LOI stacked barplot
ggplot(loi_long2, aes(x = reorder(sample, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Traditional LOI", 
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

# TGA LOI stacked barplot
ggplot(tga_long2, aes(x = reorder(Name, Temp), y = Temp, fill = variable)) +
  geom_bar(stat = 'identity', position = 'stack') + 
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
    labs(title="Thermogravimeter LOI",
         fill="Temperature",
         x = "", 
         y = "LOI (%)")

Colorimetry

# Libraries
library(dplyr); 
library(ggplot2); 
library(ggrepel);
library(rgl);
library(plot3D);
library(colordistance)

color1 <- read.table("data/color_AA.txt", header = TRUE, sep = "\t", dec = ",")
rownames(color1) <- color1$Data.Name
color <- subset(color1[, 3:5])
summary(color)
##        L               a               b        
##  Min.   :51.37   Min.   :3.880   Min.   :15.92  
##  1st Qu.:57.73   1st Qu.:4.402   1st Qu.:16.49  
##  Median :58.79   Median :4.555   Median :16.93  
##  Mean   :58.18   Mean   :5.067   Mean   :17.32  
##  3rd Qu.:59.58   3rd Qu.:4.695   3rd Qu.:17.32  
##  Max.   :61.57   Max.   :8.670   Max.   :21.47
ggplot(color, 
      aes(a, b, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) + 
      labs(title = "Color (a*b)") +
      xlab("a D65") +
      ylab("b D65") 

ggplot(color1,
      aes(X, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "Luminance") +
      ylab("L* (D65)") +
      xlab("Sample number") 

ggplot(color,
      aes(a, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "A versus luminance") +
      ylab("L* (D65)") +
      xlab("a") 

ggplot(color,
      aes(b, L, label = rownames(color))) +
      geom_point(size=2) +
      geom_text_repel(size=3) +     
      labs(title = "B versus luminance") +
      ylab("L* (D65)") +
      xlab("b") 

# Colorful 3D plot with drop lines
scatter3D(color$a, color$b, color$L, 
          xlab = "a* (D65)", ylab = "b* (D65)", zlab="L* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$a, color$b, color$L,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)

scatter3D(color$L, color$a, color$b, 
          xlab = "L* (D65)", ylab = "a* (D65)", zlab="b* (D65)",
          phi = 0, bty = "g",  type = "h", 
          ticktype = "detailed", pch = 19, cex = 1.2)
          text3D(color$L, color$a, color$b,  labels = rownames(color),
          add = TRUE, colkey = FALSE, cex = 0.7)